import requests
from bs4 import BeautifulSoup
import csv

# 模拟浏览器请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# 定义爬取函数
def crawl_douban():
    base_url = 'https://movie.douban.com/top250'
    movies = []
    for page in range(0, 250, 25):
        url = f'{base_url}?start={page}'
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 提取每个电影条目
        items = soup.find_all('div', class_='item')
        for item in items:
            title = item.find('span', class_='title').text
            rating = item.find('span', class_='rating_num').text
            info = item.find('div', class_='bd').p.text.strip()
            movies.append({
                'title': title,
                'rating': rating,
                'info': info
            })
    
    # 保存为CSV
    with open('douban_top250.csv', 'w', encoding='utf-8', newline='') as f:
        fieldnames = ['title', 'rating', 'info']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(movies)
    print(f"成功爬取{len(movies)}条数据！")

if __name__ == '__main__':
    crawl_douban()